/*    Copyright 2010,2012 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.IIDTextModel;
import mosdi.fa.MarkovianTextModel;
import mosdi.util.Alphabet;
import mosdi.util.Log;
import mosdi.util.NamedSequence;
import mosdi.util.SequenceUtils;

public class RandomCopySubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <template.fa>\n" +
		"Options:\n" +
		"  -O <order>: model order (default: 1).\n" +
		"  -a <template.annotations.fa> To be used together with -A\n" +
		"  -A <annotations.output.fa> File to write annotations to (requires -a).\n" +
		"  -j: Use joint model for sequence and annotation to model (possible)\n" +
		"      dependencies between characters and annotations (requires -A and -a).\n" +
		"  -b <bincount> Number of bins to discretize scores.\n" +
		"  -p <pseudocounts>: Pseudocounts for estimation of models (default: 0.1).\n" +
		"  -n: keep sequence name, i.e. use same sequence labels as in original files\n" +
		"  -B <alphabet>: if either \"dna\" or \"protein\", the respective alphabet of\n" +
		"                 nucleotides or amino acids is used, respectively. Otherwise,\n" +
		"                 an alphabet consisting of the given characters is used (default:\"dna\").\n" +
		"  -i: ignore unknown characters\n" +
		"  -m <min-length>: minimum length of sequences to be produced\n" +
		"  -M <max-length>: maximum length of sequences to be produced\n";
	}

	@Override
	public String description() {
		return "Generates random sequences similar (in length, number and composition) to " +
				"given template sequences.";
	}

	@Override
	public String name() {
		return "random-copy";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 1, "O:a:A:jb:p:nB:im:M:");

		// Option dependencies
		impliedOptions("a", "A");
		impliedOptions("A", "a");
		impliedOptions("j", "a");
		impliedOptions("b", "a");

		// Mandatory arguments
		String sequenceFilename = getStringArgument(0);

		// Options
		int modelOrder = getNonNegativeIntOption("O", 1);
		String annotationFilename = getStringOption("a", null);
		String annotationOutputFilename = getStringOption("A", null);
		boolean jointModel = getBooleanOption("j", false);
		int binCount = getPositiveIntOption("b", 11);
		double pseudoCounts = getDoubleOption("p", 0.1d);
		boolean keepLabels = getBooleanOption("n", false);
		String alphabetParameter = getStringOption("B", "dna");
		boolean ignoreUnknown = getBooleanOption("i", false);
		int minLength = getPositiveIntOption("m", -1);
		int maxLength = getPositiveIntOption("M", -1);

		Alphabet alphabet;
		if (alphabetParameter.equals("dna")) {
			alphabet = Alphabet.getDnaAlphabet();
			alphabet.setSeparator('$');
		} else if (alphabetParameter.equals("protein")) {
			alphabet = Alphabet.getAminoAcidAlphabet();
		} else {
			alphabet = new Alphabet(alphabetParameter);
		}

		if (jointModel) {
			Log.errorln("Option -j is not implemented yet.");
			return 1;
		}
		
		Log.setTimingActive(true);
		Log.setLogLevel(Log.Level.VERBOSE);
		
		List<NamedSequence> namedSequences = null; 
		try {
			namedSequences = SequenceUtils.readFastaFile(sequenceFilename, alphabet, ignoreUnknown);
		} catch (Exception e) {
			Log.errorln(e.toString());
			System.exit(1);
		}

		List<int[]> sequences = new ArrayList<int[]>(namedSequences.size());
		List<int[]> annotations = null;
		if (annotationFilename!=null) {
			SequenceUtils.readAnnotationTrack("conservation", annotationFilename, namedSequences, binCount);
			annotations = new ArrayList<int[]>(namedSequences.size());
		}
		for (NamedSequence ns : namedSequences) {
			sequences.add(ns.getSequence());
			if (annotations!=null) annotations.add(ns.getAnnotationTrack("conservation"));
		}
		
//		NamedSequence ns0 = namedSequences.get(0);
//		System.out.println("Name: "+ns0.getName());
//		System.out.println("Sequence: "+alphabet.buildString(Arrays.copyOf(ns0.getSequence(),Math.min(100,ns0.length()))));
//		System.out.println("Annotation: "+Arrays.toString(Arrays.copyOf(ns0.getAnnotationTrack("conservation"),Math.min(100,ns0.length()))));
		
		FiniteMemoryTextModel textModel;
		FiniteMemoryTextModel annotationModel = null;
		if (modelOrder==0) {
			textModel = new IIDTextModel(alphabet.size(), sequences);
			if (annotations!=null) annotationModel = new IIDTextModel(binCount, annotations);
		} else {
			textModel = new MarkovianTextModel(modelOrder, alphabet.size(), sequences, pseudoCounts);
			if (annotations!=null) annotationModel = new MarkovianTextModel(modelOrder, binCount, annotations, pseudoCounts);
		}

		try {
			PrintStream annotationPS = null;
			FileOutputStream annotationsFile = null;
			if (annotations!=null) {
				annotationsFile = new FileOutputStream(annotationOutputFilename);
				annotationPS = new PrintStream(annotationsFile);
			}
			int n = 0;
			for (NamedSequence ns : namedSequences) {
				if ((minLength>0) && (ns.length() < minLength)) continue;
				if ((maxLength>0) && (ns.length() > maxLength)) continue;
				String name = keepLabels?ns.getName():("random"+n);
				System.out.println(">"+name);
				System.out.println(alphabet.buildString(textModel.generateRandomText(ns.length())));
				if (annotations!=null) {
					annotationPS.println(">"+name);
					int[] random = annotationModel.generateRandomText(ns.length());
					for (int x : random) {
						annotationPS.print(x);	
						annotationPS.print(" ");
					}
					annotationPS.println();
				}
				n+=1;
			}
			if (annotationsFile!=null) annotationsFile.close();               
		}
		catch (IOException e) {
			System.err.println("Unable to write to file "+annotationOutputFilename);
			System.exit(-1);
		}
		return 0;
	}
}
